get_all_inspections = function(url) {
all_inspections = vector("list", length = 0)
loop_index = 1
chunk_size = 50000
DO_NEXT = TRUE
while (DO_NEXT) {
message("Getting data, page ", loop_index)
all_inspections[[loop_index]] =
GET(url,
query = list(`$order` = "zipcode",
`$limit` = chunk_size,
`$offset` = as.integer((loop_index - 1) * chunk_size)
)
) %>%
content("text") %>%
fromJSON() %>%
as_tibble()
DO_NEXT = dim(all_inspections[[loop_index]])[1] == chunk_size
loop_index = loop_index + 1
}
all_inspections
}
url = "https://data.cityofnewyork.us/resource/43nn-pn8j.json"
nyc_inspections =
get_all_inspections(url) %>%
bind_rows()
## Getting data, page 1
## Getting data, page 2
## Getting data, page 3
## Getting data, page 4
## Getting data, page 5
## Getting data, page 6
## Getting data, page 7
## Getting data, page 8
## Getting data, page 9
nyc_inspections_df =
nyc_inspections %>%
select(boro, cuisine_description, inspection_date, violation_code, score, grade) %>%
filter(
grade %in% c("A", "B", "C"),
boro == "Manhattan") %>%
drop_na(grade)
Column
Chart A
nyc_inspections_df %>%
plot_ly(
x = ~grade, y = ~score, color = ~grade,
type = "box", colors = "viridis")
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Column
Chart B
nyc_inspections_df %>%
count(violation_code) %>%
mutate(violation_code = fct_reorder(violation_code, n)) %>%
plot_ly(
x = ~n, y = ~violation_code, color = ~violation_code,
type = "bar", colors = "viridis")
## Warning: Ignoring 1 observations
Chart C
score_distribution =
nyc_inspections_df %>%
ggplot(aes(x = score, fill = grade)) +
geom_density(alpha = .4, adjust = .5, color = "blue")
ggplotly(score_distribution)
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.